Purpose of analysis
Enhanced match data to investigate changes determined by switching from Centrally Held Market Extension product to the Location Plus product.

ME Locations [ME Locations] (https://autotrader.eu.looker.com/explore/customer_performance/retailer_performance?qid=UOGyp76yoNhXo6N30w1V43&toggle=dat,fil,vis,vse) ME Locations

Currently locations are closely grouped together in the South East region of the UK.

Me Product Timeline
[ME Product Timeline] (https://autotrader.eu.looker.com/explore/customer_performance/retailer_performance?qid=EcNifiWF2u6fyJJYnUxWE7&toggle=fil,vis) ME Product Timeline
Above is a timeline of stock on the Market Extension, centrally held stock on ‘DCC Online’ until June 2021 then moving onto the Location Plus product.

VRM Example - Direct Match vs ME Match Current direct match does not properly measure market extension, a vrm could be seen at multiple locations if on the location plus product. The example below shows that this specific vrm should have a direct match as it was viewed at a different location (due to ME) to where it was sold at. ——————————————-

sold vrm buyer postcode district sold did viewed vrm viewed did viewed date me vrm match flag
GU15MXO DT10 10028897 LB62NKZ 10027421 2021-07-01 0
GU15MXO DT10 10028897 ML62AUR 10027421 2021-07-01 0
GU15MXO DT10 10028897 GU15MXO 10027421 2021-07-01 1
GU15MXO DT10 10028897 ML62AUR 10027421 2021-07-01 0
GU15MXO DT10 10028897 ML62AUR 10027421 2021-07-01 0

VRM Example Map View - Direct Match vs ME Match The map view below details the specific vrm example: it’s buyers location, viewed retailer location and sold retailer locations ——————————————–

vrm_example_map <- vrm_example %>%
  filter(me_vrm_match_flag == 1) %>%
  select(sold_vrm, sold_did, sold_did_lat, sold_did_lon, sold_did_postcode, buyyer_postcode_district, buyer_lat, buyer_lon, viewed_vrm, viewed_did, viewed_retailer_postcode, viewed_retailer_postcode_1, viewed_retailer_postcode_2, viewed_distance_between) 


worldmap = map_data('world')

ggplot() + 
  geom_polygon(data = worldmap, 
               aes(x = long, y = lat, 
                   group = group), 
               fill = 'gray90', 
               color = 'black') + 
  coord_fixed(ratio = 1.3, 
              xlim = c(-5,1), 
              ylim = c(50, 55)) + 
  theme_void() + 
  geom_point(data = vrm_example_map, aes(x = sold_did_lon, y = sold_did_lat,col = "sold location")) +
  geom_point(data = vrm_example_map, aes(x = viewed_retailer_postcode_2, y = viewed_retailer_postcode_1, col = "viewed location")) + 
  geom_point(data = vrm_example_map, aes(x = buyer_lon, y = buyer_lat,  col = "buyer location")) + theme(legend.position="right")

VRM Example Map View - Direct Match vs ME Match
The map view below details the specific vrm example: it’s buyers location, viewed retailer location and sold retailer locations.
direct_vs_me_matches_agg <- enhanced_match %>%
                            group_by(sold_vrm, sold_month, sold_make, type_of_stock) %>%
                            mutate(value_added_products = ifelse(Value.added.Products >0, 1,0),
                            matched_viewed_distance_between = ifelse( (me_vrm_match_flag + site_all_match_flag + group_all_match_flag) > 0, viewed_distance_between, NA)
                            )%>%
                 summarise(row_count = n(),
                      me_vrm_match_flag = max(me_vrm_match_flag, na.rm = TRUE),
                      direct_match_flag = max(vrm_match_flag, na.rm = TRUE))



direct_vs_me_matches <- direct_vs_me_matches_agg %>%
                        group_by(sold_month) %>%
                        summarise(me_vrm_matches = sum(me_vrm_match_flag, na.rm= TRUE),
                         direct_matches =  sum(direct_match_flag, na.rm=TRUE))


direct_vs_me_matches_m <- melt(direct_vs_me_matches[,c('sold_month', 'me_vrm_matches', 'direct_matches')],id.vars = 1)



ggplot(direct_vs_me_matches_m,aes(x = sold_month,y = value)) + 
    geom_bar(aes(fill = variable),stat = "identity",position = "stack") + theme_minimal() + scale_fill_brewer() + labs(title = "Direct vs ME Matches") + labs(x = expression("Month"), y = "Number of Matches")

# Create a vrm level summary of the events level data for analysis
vrm_agg <- enhanced_match %>%
            group_by(sold_vrm, sold_month, sold_make, type_of_stock) %>%
            mutate(value_added_products = ifelse(Value.added.Products >0, 1,0),
           matched_viewed_distance_between = ifelse( (me_vrm_match_flag + site_all_match_flag + group_all_match_flag) > 0, viewed_distance_between, NA)
              )%>%
            summarise(row_count = n(),
                      me_vrm_match_flag = max(me_vrm_match_flag, na.rm = TRUE),
                      site_match_flag = max(site_all_match_flag, na.rm = TRUE),
                      group_match_flag = max(group_all_match_flag, na.rm = TRUE),
                      matched_viewed_distance_between = mean(matched_viewed_distance_between, na.rm=T),
                      profit_margin = mean(Chassis.Margin, na.rm=T),
                      value_added_products_flag = max(value_added_products)
                      ) %>%
            mutate(site_match_flag = ifelse(me_vrm_match_flag == 1 ,0, site_match_flag),
                  group_match_flag = ifelse( (site_match_flag == 1) | (me_vrm_match_flag == 1) ,0, group_match_flag),
                  no_match_flag = ifelse((me_vrm_match_flag + site_match_flag + group_match_flag) >0, 0, 1),
                  )

# replace NA with no match as this is a legit banding - if no match then ther is no distanc, so still want to include this in charts
vrm_agg$matched_viewed_distance_between[is.nan(vrm_agg$matched_viewed_distance_between)]<- -1

# create distance banding and turn into factor so shows in right order on charts
vrm_agg <- vrm_agg %>%
        mutate(viewed_distance_banding = ifelse(matched_viewed_distance_between == -1, "No match",
                                         ifelse(matched_viewed_distance_between <10, "less than 10 miles",
                                         ifelse(matched_viewed_distance_between <20, "less than 20 miles",
                                         ifelse(matched_viewed_distance_between <50, "less than 50 miles",
                                         ifelse(matched_viewed_distance_between <100, "less than 100 miles",
                                         ifelse(matched_viewed_distance_between >=100, "100 or more miles" 
                                                )))))))

#  convert to factore with levels
vrm_agg$viewed_distance_banding <- factor(vrm_agg$viewed_distance_banding, levels = c("less than 10 miles", "less than 20 miles", "less than 50 miles", "less than 100 miles", "100 or more miles", 'No match'), ordered = TRUE)


# change back ***SKANKY, NEED TO FIND BETTER WASY TO DO THIS!!!! ***
vrm_agg$matched_viewed_distance_between[vrm_agg$matched_viewed_distance_between == -1]<- NaN
How many sales did Auto Trader influence (split by the usual VRM, Site, Group level)
Sales increased in July 2021 for the DCC group as they moved onto the location plus product, but sales dropped down to previous months levels in August.
matches <- vrm_agg %>%
          group_by(sold_month) %>%
          summarise(me_vrm_matches = sum(me_vrm_match_flag, na.rm= TRUE),
                    site_matches =  sum(site_match_flag, na.rm=TRUE),
                    group_matches = sum(group_match_flag, na.rm = TRUE),
                    no_matches = sum(no_match_flag, na.rm = TRUE))



matches_m <- melt(matches[,c('sold_month', 'no_matches', 'group_matches','site_matches', 'me_vrm_matches')],id.vars = 1)


ggplot(matches_m,aes(x = sold_month,y = value)) + 
    geom_bar(aes(fill = variable),stat = "identity",position = "stack") + theme_minimal() + scale_fill_brewer() + labs(title = "Autotrader Influence") + labs(x = expression("Month"), y = "Influence")

How far away is the buyer to the seller?
There does not look to be a large change in the distance between buyer and seller but this is likely due to the fact that DCC have locations grouped close together.
# create distance aggregation to show how distance buyer travels changes over time
distance_matches <- vrm_agg %>%
                      filter(!is.na(viewed_distance_banding)) %>%
                      group_by(sold_month, viewed_distance_banding) %>%
                      summarise(me_vrm_matches = sum(me_vrm_match_flag, na.rm= TRUE),
                                site_matches =  sum(site_match_flag, na.rm=TRUE),
                                group_matches = sum(group_match_flag, na.rm = TRUE),
                                no_matches = sum(no_match_flag, na.rm = TRUE))

# melt to in right format for plotting

distance_matches_m <- melt(distance_matches[,c('sold_month', 'viewed_distance_banding', 'me_vrm_matches', 'site_matches', 'group_matches', 'no_matches')],id.vars = c(1,2))



# create barplot

tmp <- distance_matches_m %>% group_by(sold_month) %>% summarise(monthly_total = sum(value, na.rm=T))
distance_matches_m_agg <- left_join(distance_matches_m, tmp, by = c('sold_month'), copy=T)
distance_matches_m_agg <- distance_matches_m_agg %>%
                            ungroup() %>%
                            group_by(sold_month, viewed_distance_banding) %>% 
                              summarise(percent = sum(value, na.rm=T)/monthly_total)

ggplot(distance_matches_m_agg,aes(x = sold_month,y = percent)) + 
    geom_bar(aes(fill = viewed_distance_banding),stat = "identity",position = "fill") + theme_minimal() + scale_fill_brewer() + labs(title = "% of sales by distance banding") + labs(x = expression("Month"), y = "Influence") + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

#need to sort labelling out ....geom_text(aes(label=distance_matches_m_agg$percent, y=distance_matches_m_agg$percent/100)) + 

#forcats::fct_rev
Profit by Distance Sold - July 2021 Data
Enriched profit data for DCC was only available for July 2021, the data shows that average profit does not seem to fluctuate much when the buyer is further away from the retailer. Most sales for DCC came from less than 100 miles away from the site location.
profit_agg <- vrm_agg %>%
            filter(no_match_flag == 0) %>%
            group_by(viewed_distance_banding) %>%
            summarise(row_count = n(),
                      distinct_sales = n_distinct(sold_vrm),
                      avg_profit = mean(profit_margin, na.rm = TRUE))


#hist(vrm_agg$profit_margin, col = 'skyblue3', breaks = 20, xlab = 'profit margin', main = "Avg profit distribution")
par(mai=c(1,2,1,1))
barplot(profit_agg$avg_profit, names.arg = profit_agg$viewed_distance_banding, col="dodgerblue3",
        density=c(30,10,20,35,15,25),angle=60, main = "Avg profit by distance", xlab = "Mean Profit(£)" , cex.names=0.7, horiz = TRUE, las=1)

par(mai=c(1,2,1,1))
barplot(profit_agg$distinct_sales, names.arg = profit_agg$viewed_distance_banding, col="dodgerblue3",
        density=c(30,10,20,35,15,25),angle=60, main = "Amount of sales by distance banding", xlab = "Number of Sales" , cex.names=0.7, horiz = TRUE, las=1)

What is the total value/profit of cars sold through Market Extension - product add ons
Although less sales are seen with finance add ons in July 2021 those that were sold with add ons were the most likely to come from 50 - 100 miles away from the retailer location.
has_finance <- vrm_agg %>%
            group_by(viewed_distance_banding) %>%
            filter(value_added_products_flag == 1) %>%
            summarise(distinct_sales = n_distinct(sold_vrm))


no_finance <- vrm_agg %>%
            group_by(viewed_distance_banding) %>%
            filter(value_added_products_flag == 0) %>%
            summarise(distinct_sales = n_distinct(sold_vrm))


par(mfrow=c(1,2)) 
barplot(has_finance$distinct_sales, names.arg = has_finance$viewed_distance_banding, col = 'steelblue', main = "Add Ons", xlab = "Distinct Sales",cex.names=0.7, horiz = TRUE, las=1)
barplot(no_finance$distinct_sales, names.arg = no_finance$viewed_distance_banding, col = 'steelblue', main = "No Add Ons", xlab = "Distinct Sales", cex.names=0.7, horiz = TRUE, las=1)

par(mfrow=c(1,1))
How does distance between buyer & seller vary by sold vehicle type (e.g. standard vs. premium)?
premium_standard_matches <- vrm_agg %>%
                      filter(type_of_stock %in% c('standard', 'premium')) %>%
                      group_by(sold_month, viewed_distance_banding, type_of_stock) %>%
                      summarise(me_vrm_matches = sum(me_vrm_match_flag, na.rm= TRUE),
                                site_matches =  sum(site_match_flag, na.rm=TRUE),
                                group_matches = sum(group_match_flag, na.rm = TRUE),
                                no_matches = sum(no_match_flag, na.rm = TRUE))

# melt to in right format for plotting
premium_standard_matches_m <- melt(premium_standard_matches[,c('sold_month','viewed_distance_banding', 'type_of_stock', 'no_matches', 'group_matches','site_matches', 'me_vrm_matches')],id.vars = c(1,2,3))

# create barplot
ggplot(premium_standard_matches_m,aes(x = sold_month,y = value)) + 
    geom_bar(aes(fill = viewed_distance_banding),stat = "identity",position = "fill") + theme_minimal() + scale_fill_brewer() + labs(title = "Distance Banding") + labs(x = expression("Month"), y = "Influence")+ facet_grid(cols = vars(type_of_stock))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

How does influence rate between buyer & seller vary by sold vehicle type (e.g. standard vs. premium)?
ggplot(premium_standard_matches_m,aes(x = sold_month,y = value)) + 
    geom_bar(aes(fill = variable),stat = "identity",position = "fill") + theme_minimal() + scale_fill_brewer() + labs(title = "Autotrader Influence") + labs(x = expression("Month"), y = "Influence")+ facet_grid(cols = vars(type_of_stock))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())